Differential

Preamble

Dependencies

Code
library(edgeR)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scuttle)
library(pheatmap)
library(patchwork)
library(ggbeeswarm)
library(EnhancedVolcano)
library(SingleCellExperiment)

Loading

Code
sce <- readRDS(file.path("..", "outs", "01-sce.rds"))

Utils

Code
.volcano <- \(df, title, fdr = 0.05, lfc = 1) {
  EnhancedVolcano(df, 
    x = "logFC", y = "FDR",
    FCcutoff = lfc, pCutoff = fdr,
    pointSize = 1, raster = TRUE,
    title = title, subtitle = NULL,
    lab = df[["gene"]], labSize = 2, 
    drawConnectors = TRUE, widthConnectors = 0.5) +
  guides(col = guide_legend(override.aes = list(alpha = 1, size = 3))) +
  theme_bw(9) + theme(
    aspect.ratio = 1,
    legend.title = element_blank(),
    panel.grid.minor = element_blank())
}

Analysis

Code
# exclude LN for DGE analysis
sub <- sce[, sce$TissueSub != "LN"]
df <- data.frame(colData(sub), t(logcounts(sub)), check.names = FALSE)
gg <- pivot_longer(df, any_of(rownames(sub)), names_to = "gene", values_to = "expr")
Code
# setup subtype groupings
names(kid) <- kid <- c("Kidney", "RCC")
names(lun) <- lun <- c("Alveoles", "LSCC")
names(tls) <- tls <- c("E_TLS", "SFL_TLS", "PFL_TLS", "Tcell_TLS")
# split by tumor type
dat <- c(list(
    RCC = sub[, sub$TumorType == "RCC"], 
    LSCC = sub[, sub$TumorType == "LSCC"]),
    lapply(tls, \(.) sub[, sub$TissueSub == .]))
Code
ref <- c(
    RCC = unname(kid[1]), 
    LSCC = unname(lun[1]), 
    sapply(tls, \(.) "LSCC"))
names(ids) <- ids <- names(dat)
dat <- dat[ids]; ref <- ref[ids]
fit <- lapply(ids, \(.) {
    # setup design matrix
    df <- data.frame(colData(dat[[.]]))
    if (. %in% tls) {
        tt <- factor(df$TumorType)
        df$TumorType <- relevel(tt, ref[.])
        mm <- model.matrix(~0+TumorType, df)
        colnames(mm) <- gsub("TumorType", "", colnames(mm))
    } else {
        st <- droplevels(df$TissueSub)
        df$TissueSub <- relevel(st, ref[.])
        mm <- model.matrix(~0+TissueSub, df)
        colnames(mm) <- gsub("TissueSub", "", colnames(mm))
    }
    # fit GLM model
    dgl <- DGEList(assay(dat[[.]]))
    dgl <- calcNormFactors(dgl)
    dgl <- estimateDisp(dgl, mm)
    fit <- glmQLFit(dgl, mm)
})
Code
# setup contrasts
cs <- c(list(
    # within tumor types
    RCC = c(
        # normal/tumor vs. any TLS
        list(TLS = list(kid, tls)), 
        # normal/tumor vs. specific TLS
        lapply(tls, \(t) list(kid, t)),
        # TLS maturation stages
        list(
            SFL = list("E_TLS", "SFL_TLS"),
            Tcell = list("E_TLS", "Tcell_TLS"))),
    LSCC = c(
        # normal/tumor vs. any TLS
        list(TLS = list(lun, tls)), 
        # normal/tumor vs. specific TLS
        lapply(tls, \(t) list(lun, t)),
        # TLS maturation stages
        list(
            SFL = list("E_TLS", "SFL_TLS"),
            Tcell = list("E_TLS", "Tcell_TLS")))),
    # across tumor types, within TLS subsets
    lapply(tls, \(.) setNames(list("RCC"), .)))
cs <- lapply(ids, \(.) {
    x <- numeric(ncol(mm <- fit[[.]]$design))
    lapply(cs[[.]], \(c) {
        if (length(c) == 1) {
            i <- match(c, colnames(mm))
            x[i] <- 1; x[-i] <- -1
        } else {
            a <- match(c[[1]], colnames(mm))
            b <- match(c[[2]], colnames(mm))
            x[a] <- -1/sum(a != 0)
            x[b] <- 1/sum(b != 0)
        }
        return(x)
    })
})
Code
# run DGE analysis
res <- lapply(ids, \(.) {
    lapply(names(cs[[.]]), \(c) {
        ht <- glmQLFTest(fit[[.]], contrast = cs[[.]][[c]])
        tt <- topTags(ht, n = Inf)$table
        data.frame(row.names = NULL,
            gene = rownames(tt), tt,
            contrast = c, subset = .)
    }) |> do.call(what = rbind)
}) |> do.call(what = rbind)
rownames(res) <- NULL

Visualization

Volcano

Code
ps <- lapply(setdiff(ids, tls), \(.) {
    df <- res[res$subset == . & res$contrast == "TLS", ]
    .volcano(df, title = ., fdr = 1e-4, lfc = 1.25)
})
wrap_plots(ps, nrow = 1) + 
  plot_layout(guides = "collect") &
  theme(legend.position = "top")

Code
ps <- lapply(tls, \(.) {
    df <- res[res$subset == . & grepl("TLS", res$contrast), ]
    .volcano(df, title = ., fdr = 1e-3, lfc = 0.5)
})
wrap_plots(ps, nrow = 2) + 
  plot_layout(guides = "collect") &
  theme(legend.position = "top")

Code
df <- res[res$contrast %in% c("SFL", "Tcell"), ]
ps <- lapply(unique(df$subset), \(s) {
    lapply(unique(df$contrast), \(c) {
        df <- res[res$subset == s & res$contrast == c, ]
        .volcano(df, fdr = 1e-4, lfc = 1.25, title = paste(s, c, sep = ","))
    })
})
wrap_plots(Reduce(c, ps), nrow = 2) + 
  plot_layout(guides = "collect") &
  theme(legend.position = "top")

Heatmap

Code
top <- res %>%
    filter(!subset %in% tls) %>%
    group_by(subset) %>%
    filter(FDR < 0.1, contrast == "TLS") %>% 
    slice_max(abs(logFC), n = 40) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    mtx <- logcounts(sub)[top[[.]]$gene, sub$TumorType == .]
    cd <- data.frame(colData(sub))[c("TissueSub")]
    hm <- pheatmap(mtx, 
        main = ., fontsize = 6,
        col = rev(hcl.colors(51, "RdBu")),
        scale = "row", show_colnames = FALSE, annotation_col = cd)
    print(hm); cat("\n\n")
}

LSCC

RCC

Code
top <- res %>%
    group_by(subset) %>%
    filter(!grepl("TLS", contrast)) %>%
    slice_max(abs(logFC), n = 40) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    idx <- sub$TumorType == . & sub$TissueSub %in% c(
        "E_TLS", paste0(unique(top[[.]]$contrast), "_TLS"))
    mtx <- logcounts(sub)[top[[.]]$gene, idx]
    cd <- data.frame(colData(sub))[c("TissueSub")]
    hm <- pheatmap(mtx, 
        main = ., fontsize = 6,
        col = rev(hcl.colors(51, "RdBu")),
        scale = "row", show_colnames = FALSE, annotation_col = cd)
    print(hm); cat("\n\n")
}

LSCC

RCC

Boxplot

Code
top <- res %>%
    filter(!subset %in% tls) %>%
    group_by(subset) %>%
    filter(FDR < 0.05, contrast == "TLS") %>% 
    slice_max(abs(logFC), n = 25) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    plt <- ggplot(
        filter(gg, 
            TumorType == .,
            gene %in% top[[.]]$gene),
        aes(TissueSub, expr, fill = TissueSub)) +
        facet_wrap(~ gene, scales = "free_y") +
        geom_boxplot(size = 0.1, fill = NA, outlier.color = NA, show.legend = FALSE) + 
        geom_beeswarm(shape = 21, col = "black", stroke = 0.1, size = 1.2, alpha = 0.8) + 
        guides(fill = guide_legend(override.aes = list(size = 3, alpha = 1))) +
        labs(x = NULL, y = "Expression (logCPM)") +
        scale_fill_brewer(palette = "Set2") +
        theme_linedraw(9) + theme(
            panel.grid = element_blank(),
            axis.text.x = element_blank(),
            axis.ticks.x = element_blank(),
            strip.background = element_blank(),
            legend.key.size = unit(0.5, "lines"),
            strip.text = element_text(color = "black", face = "bold"))
    print(plt); cat("\n\n")
}

LSCC

RCC

Code
top <- res %>%
    filter(subset %in% tls) %>%
    group_by(subset) %>%
    #filter(FDR < 0.05, contrast == "TLS") %>% 
    slice_max(abs(logFC), n = 25) %>% 
    split(.$subset)
for (. in names(top)) {
    cat("####", ., "\n")
    plt <- ggplot(
        filter(gg, 
            TissueSub == .,
            gene %in% top[[.]]$gene),
        aes(TumorType, expr, fill = TumorType)) +
        facet_wrap(~ gene, scales = "free_y") +
        geom_boxplot(size = 0.1, fill = NA, outlier.color = NA, show.legend = FALSE) + 
        geom_beeswarm(shape = 21, col = "black", stroke = 0.1, size = 1.2, alpha = 0.8) + 
        guides(fill = guide_legend(override.aes = list(size = 3, alpha = 1))) +
        labs(x = NULL, y = "Expression (logCPM)") +
        scale_fill_brewer(palette = "Set2") +
        theme_linedraw(9) + theme(
            panel.grid = element_blank(),
            axis.text.x = element_blank(),
            axis.ticks.x = element_blank(),
            strip.background = element_blank(),
            legend.key.size = unit(0.5, "lines"),
            strip.text = element_text(color = "black", face = "bold"))
    print(plt); cat("\n\n")
}

E_TLS

PFL_TLS

SFL_TLS

Tcell_TLS

Appendix

Code
saveRDS(res, file.path("..", "outs", "02-dge.rds"))
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.2.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] EnhancedVolcano_1.18.0      ggrepel_0.9.3              
 [3] ggbeeswarm_0.7.2            patchwork_1.1.2            
 [5] pheatmap_1.0.12             scuttle_1.10.1             
 [7] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.1
 [9] Biobase_2.60.0              GenomicRanges_1.52.0       
[11] GenomeInfoDb_1.36.0         IRanges_2.34.0             
[13] S4Vectors_0.38.1            BiocGenerics_0.46.0        
[15] MatrixGenerics_1.12.0       matrixStats_0.63.0         
[17] ggplot2_3.4.2               tidyr_1.3.0                
[19] dplyr_1.1.2                 edgeR_3.42.2               
[21] limma_3.56.1               

loaded via a namespace (and not attached):
 [1] beeswarm_0.4.0            gtable_0.3.3             
 [3] xfun_0.39                 htmlwidgets_1.6.2        
 [5] lattice_0.21-8            Cairo_1.6-0              
 [7] vctrs_0.6.2               tools_4.3.0              
 [9] bitops_1.0-7              generics_0.1.3           
[11] parallel_4.3.0            tibble_3.2.1             
[13] fansi_1.0.4               pkgconfig_2.0.3          
[15] Matrix_1.5-4.1            RColorBrewer_1.1-3       
[17] sparseMatrixStats_1.12.0  lifecycle_1.0.3          
[19] GenomeInfoDbData_1.2.10   farver_2.1.1             
[21] compiler_4.3.0            munsell_0.5.0            
[23] codetools_0.2-19          vipor_0.4.5              
[25] htmltools_0.5.5           RCurl_1.98-1.12          
[27] yaml_2.3.7                pillar_1.9.0             
[29] crayon_1.5.2              BiocParallel_1.34.2      
[31] DelayedArray_0.26.3       tidyselect_1.2.0         
[33] locfit_1.5-9.7            digest_0.6.31            
[35] purrr_1.0.1               labeling_0.4.2           
[37] splines_4.3.0             fastmap_1.1.1            
[39] grid_4.3.0                colorspace_2.1-0         
[41] cli_3.6.1                 magrittr_2.0.3           
[43] S4Arrays_1.0.4            utf8_1.2.3               
[45] withr_2.5.0               DelayedMatrixStats_1.22.0
[47] scales_1.2.1              rmarkdown_2.21           
[49] XVector_0.40.0            beachmat_2.16.0          
[51] evaluate_0.21             ggrastr_1.0.1            
[53] knitr_1.42                rlang_1.1.1              
[55] Rcpp_1.0.10               glue_1.6.2               
[57] rstudioapi_0.14           jsonlite_1.8.4           
[59] R6_2.5.1                  zlibbioc_1.46.0